from lxml import etree
import copy,html,re,time
import json,urllib.parse,psutil,os,openpyxl

pat_img=re.compile(r"var obj \= jQuery\.parseJSON\('(.*)'\);$",re.M)
patSizes=re.compile(r"P\.register\('twister-js-init-dpx-data', function\(\) \{.*?var dataToReturn = (\{.*?\});",re.S)
pat_price=re.compile(r'\$([0-9]+(?:\.[0-9]+)?)')
pat_rank=re.compile(r'Best Sellers Rank:\s*</span>\s*#([\d,]+)')
pat_rating=re.compile(r'([\d,]+) rating[s]?')
pat_bigimg=re.compile(r'_SR\d+,\d+_')

def add_query_parameters(base_url, params):
    """
    添加查询参数到链接中，保留原始查询参数。

    Args:
        base_url (str): 基本链接。
        params (dict): 包含参数名称和值的字典。

    Returns:
        str: 包含查询参数的新链接。
    """
    # 解析基本链接以获取其查询字符串部分
    parsed_url = urllib.parse.urlparse(base_url)

    # 解析现有的查询参数
    existing_query_params = urllib.parse.parse_qs(parsed_url.query)

    # 将新参数添加到现有参数中
    existing_query_params.update(params)

    # 构建新的查询字符串
    new_query_string = urllib.parse.urlencode(existing_query_params, doseq=True)

    # 用新的查询字符串替换现有的查询字符串
    new_parsed_url = parsed_url._replace(query=new_query_string)

    # 从新的解析链接中获取最终的链接字符串
    new_url = urllib.parse.urlunparse(new_parsed_url)

    return new_url

async def repeat_goto(page,url,t=3):

    for jj in range(t):
        try:
            await page.goto(url)
            break
        except Exception as e:
            print(f'浏览器goto错误 => {e},正在重试{jj+1}次')


def killjc(jcname):
    pids = psutil.pids()
    for pid in pids:
        try:
            p = psutil.Process(pid)
            # print('pid-%s,pname-%s' % (pid, p.name()))
            if p.name() == f'{jcname}':
                print(f'关闭{pid} => {p.name()}')
                cmd = f'taskkill /F /IM {jcname}'
                os.system(cmd)

        except Exception as e:
            pass

async def myawait(page,xpath,t=30):

    for jj in range(t):
        try:
            s=await page.xpath(xpath)
            if s:
                return s
            await page.waitFor(1000)
        except Exception as e:
            #print(f'等待xpath{xpath}错误:{jj+1}次 =>{e}')
            await page.waitFor(1000)

    return 0
    
async def page_scroll_bottom(page,w,h,flag=False):
    for jj in range(3):
        await page.mouse.move(w-10,20)
        await page.mouse.down()
        await page.mouse.move(w-10,h+50, {'steps': 20})
        await page.mouse.up()
        await page.waitFor(1000)
        els=await page.xpath('//div[@data-sqe="item"]/a[1]')
        if not flag:
            if len(els)>=60:
                break
        else:
            if len(els)>=0:
                break

def kchrome():
    pids = psutil.pids()
    for pid in pids:
        try:
            p = psutil.Process(pid)
            # print('pid-%s,pname-%s' % (pid, p.name()))
            if p.name() == 'chrome.exe':
                #print(f'关闭{pid} => {p.name()}')
                cmd = 'taskkill /F /IM chrome.exe'
                os.system(cmd)

        except Exception as e:
            pass

def get_config(path='config.json'): 
    with open(path,'rb') as f:
        objcig=json.load(f)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     
    return objcig

def delempty2(fpath):
    wb=openpyxl.load_workbook(fpath)
    ws=wb[wb.sheetnames[0]]

    while True:
        if ws.cell(ws.max_row,1).value:
            break
        ws.delete_rows(ws.max_row)
    #print(f'删除空白行后,剩余{ws.max_row}')

    while True:
        if ws.cell(1,ws.max_column).value:
            break
        ws.delete_cols(ws.max_column)
    #print(f'删除空白列后,剩余{ws.max_column}')
    # wb.save(fpath)
    return ws

def getDictDatasByEecel(fpath):
    ws=delempty2(fpath)

    datas=[]    
    headers=[ws.cell(1,col).value for col in range(1,ws.max_column+1)]
    for row in range(2,ws.max_row+1):

        currow={}
        for col in range(1,ws.max_column+1):
            curtitle=headers[col-1]
            currow[curtitle]=ws.cell(row,col).value
        datas.append(currow)

    return datas

def jx_phtml(fasin,mid,cot:str):
    pdatas=[]
    #排名
    rank_match = pat_rank.search(cot)
    if not rank_match:
        return {'code':-1,'msg':'无排名','data':pdatas}
    rank = int(rank_match.group(1).replace(',', ''))
    #print(rank)  # 输出: 56004

    xpcot=etree.HTML(cot)
    jsdata1=json.loads(pat_img.search(cot).group(1))
    colorImages=jsdata1['colorImages']
    if not colorImages:
        el_colorImages=xpcot.xpath('//div[@id="altImages"]//img/@src')
        ''
        colorImages={f'{fasin}':[{'large':pat_bigimg.sub( '_', src)} for src in el_colorImages if src.endswith('jpg') or src.endswith('png')]}
    #标题
    title=jsdata1['title']
    #print(title)
    #价格
    el_cpjg=xpcot.xpath('//input[@id="twister-plus-price-data-price"]/@value')
    if el_cpjg:

        kdjg=0
        el_kdjg=xpcot.xpath('//span[@data-csa-c-content-id="DEXUnifiedCXPDM"]/@data-csa-c-delivery-price')
        if el_kdjg:
            if el_kdjg[0]!='FREE':
                kdjg=float(pat_price.search(el_kdjg[0]).group(1))
        
        price=float(el_cpjg[0])+kdjg
    else:
        print(f'ASIN:{fasin},无价格,直接定价9.99')
        price=9.99
    #print(price)

    #Product details
    xxcc={}
    el_cs=xpcot.xpath('//div[@class="a-fixed-left-grid product-facts-detail"]')
    for elit in el_cs:
        ctxts=elit.xpath('.//span[@class="a-color-base"]/text()')
        cname,cvalue=[txt.strip() for txt in ctxts]
        xxcc[cname]=cvalue
    #print(xxcc)

    #描述
    decstr=''
    aboutitem=xpcot.xpath('//span[@class="a-list-item a-size-base a-color-base"]/text()')
    decstr+='\n'.join(aboutitem)
    pdeclong=''
    el_pdec=xpcot.xpath('//div[@id="productDescription"]//p//text()')
    if el_pdec:
        pdeclong=''.join(el_pdec)
    if pdeclong:
        decstr=decstr+'\n'+pdeclong
    #print(f'dec:{decstr}')

    #评分和评价数量
    rating_star=0
    cmt_count=0
    el_rating=xpcot.xpath('//div[@id="detailBullets_averageCustomerReviews"]')
    if el_rating:
        el_rating_star=el_rating[0].xpath('.//span[@id="acrPopover"]//span[@class="a-size-base a-color-base"]/text()')
        rating_star=float(el_rating_star[0].strip())

        el_px=el_rating[0].xpath('.//span[@id="acrCustomerReviewText"]/text()')
        if pat_rating.search(el_px[0]):
            pxstr=pat_rating.search(el_px[0]).group(1)
            px_count=int(pxstr.replace(',',''))
            cmt_count=px_count//8
        #print(el_px[0])
    #视频
    vurl=None
    vds=jsdata1['videos']
    if len(vds)>0:
        vurl=vds[0]['url']
    ydata={
            'itemid':fasin,
            'shopid':mid,
            'bsr':rank,
            'pname':title.replace('&#34;', '"').replace('&#39;', "'"),
            'price':price,
            'skupirce':price,
            'rating_star':rating_star,
            'cmt_count':cmt_count,
            'pdec':decstr.replace('&#34;', '"').replace('&#39;', "'"),
            'attributes':json.dumps(xxcc),
            'video':vurl,
            'currency':'USD',
            'cjTime':int(time.time()),
            'datatype':2
            
        }
    el_size=patSizes.search(cot)
    if el_size:
        size_str=patSizes.search(cot).group(1)
        size_str=re.sub(r',[\s]*([\]\}])', r'\1', size_str).replace("\\'", "'")

        jsdata2=json.loads(size_str)
        skutypelist=jsdata2['dimensionsDisplaySubType']
        dimensionValues=jsdata2['dimensionValuesDisplayData']

            
        img_ind=skutypelist.index('IMAGE') if 'IMAGE' in skutypelist else None
        txt_ind=skutypelist.index('TEXT') if 'TEXT' in skutypelist else None

        for skuid,skuv in dimensionValues.items():
            cydata = copy.copy(ydata)
            sku2=None

            if len(skuv)==1:
                sku1=skuv[0]
                imgkey=sku1.replace("/", "\\/")
                
                if img_ind is not None:
                    lbimgs=[imgttt['large'] for imgttt in colorImages[imgkey]]
                else:
                    lbimgs=[imgttt['large'] for imgttt in list(colorImages.values())[0]]
            
            elif len(skuv)==2:
                if img_ind is not None:
                    sku1=skuv[img_ind]
                    sku2=skuv[1 if img_ind==0 else 0]
                    escaped_sku1=sku1.replace("/", "\\/")
                    escaped_sku2 = sku2.replace("/", "\\/")
                    imgkey=escaped_sku1
                    if not colorImages.get(imgkey):
                        imgkey=f'{escaped_sku2} {escaped_sku1}'
                    if not colorImages.get(imgkey):
                        imgkey=f'{escaped_sku1} {escaped_sku2}'
                    lbimgs=[imgttt['large'] for imgttt in colorImages[imgkey]]
                else:
                    lbimgs=[imgttt['large'] for imgttt in list(colorImages.values())[0]]
                    if txt_ind is not None:
                        sku1=skuv[1 if txt_ind==0 else 0]
                        sku2=skuv[txt_ind]
                    else:
                        sku1=skuv[0]
                        sku2=skuv[1]
            else:
                return {'code':-2,'msg':'超过2级sku','data':pdatas}

            zt=lbimgs[0]
            skuimg=lbimgs[0]
            cydata['skuid']=skuid
            cydata['sku1']=sku1
            cydata['sku2']=sku2
            cydata['image']=zt
            cydata['skuimg']=skuimg
            cydata['images']=json.dumps(lbimgs)
            cydata['url']=f'https://www.amazon.com/dp/{skuid}?th=1&psc=1'
            pdatas.append(cydata)

    else:
        print(f'ASIN:{fasin},单sku')
        cydata = copy.copy(ydata)
        sku1='One Style'
        sku2=None
        lbimgs=[imgttt['large'] for imgttt in list(colorImages.values())[0]]
        zt=lbimgs[0]
        skuimg=lbimgs[0]
        cydata['skuid']=fasin
        cydata['sku1']=sku1
        cydata['sku2']=sku2
        cydata['image']=zt
        cydata['skuimg']=skuimg
        cydata['images']=json.dumps(lbimgs)
        cydata['url']=f'https://www.amazon.com/dp/{fasin}?th=1&psc=1'
        pdatas.append(cydata)

    if len(pdatas)>0:
        return {'code':1,'msg':'成功解析','data':pdatas}
    else:

        return {'code':0,'msg':'解析为空','data':pdatas}


def jx_phtml2(fasin,mid,cot:str):

    cur_time=int(time.time())
    xpcot=etree.HTML(cot)
    jsdata1=None

    try:
        jsdata1=json.loads(pat_img.search(cot).group(1))
    except Exception as e:
        print(f'{fasin},解析失败,直接获取')
    colorImages=None
    if jsdata1:
        colorImages=jsdata1['colorImages']

    if not colorImages:
        el_colorImages=xpcot.xpath('//div[@id="altImages"]//img/@src')
        colorImages={f'{fasin}':[{'large':pat_bigimg.sub( '_', src)} for src in el_colorImages if src.endswith('jpg') or src.endswith('png')]}
    
    title=xpcot.xpath('//span[@id="productTitle"]/text()')[0]
        
        
    #Product details
    xxcc={}
    el_cs=xpcot.xpath('//div[@class="a-fixed-left-grid product-facts-detail"]')
    for elit in el_cs:
        ctxts=elit.xpath('.//span[@class="a-color-base"]/text()')
        cname,cvalue=[txt.strip() for txt in ctxts]
        xxcc[cname]=cvalue
    #print(xxcc)

    #描述
    decstr=''
    aboutitem=xpcot.xpath('//span[@class="a-list-item a-size-base a-color-base"]/text()')
    decstr+='\n'.join(aboutitem)
    pdeclong=''
    el_pdec=xpcot.xpath('//div[@id="productDescription"]//p//text()')
    if el_pdec:
        pdeclong=''.join(el_pdec)
    if pdeclong:
        decstr=decstr+'\n'+pdeclong
    #print(f'dec:{decstr}')

    #评分和评价数量
    rating_star=0
    cmt_count=0
    el_rating=xpcot.xpath('//div[@id="detailBullets_averageCustomerReviews"]')
    if el_rating:
        el_rating_star=el_rating[0].xpath('.//span[@id="acrPopover"]//span[@class="a-size-base a-color-base"]/text()')
        rating_star=float(el_rating_star[0].strip())

        el_px=el_rating[0].xpath('.//span[@id="acrCustomerReviewText"]/text()')
        if pat_rating.search(el_px[0]):
            pxstr=pat_rating.search(el_px[0]).group(1)
            px_count=int(pxstr.replace(',',''))
            cmt_count=px_count//8
        #print(el_px[0])
    #视频
    vurl=None
    if jsdata1:
        vds=jsdata1['videos']
        if len(vds)>0:
            vurl=vds[0]['url']
    ydata={
            'itemid':fasin,
            'shopid':mid,
            'pname':title.replace('&#34;', '"').replace('&#39;', "'").strip(),
            'rating_star':rating_star,
            'cmt_count':cmt_count,
            'pdec':decstr.replace('&#34;', '"').replace('&#39;', "'"),
            'attributes':json.dumps(xxcc),
            'video':vurl,
            'currency':'USD',
            'cjTime':int(time.time()),
            'datatype':2,
            'PingTai':'美国亚马逊',
            'cjTime':cur_time
        }

    cydata = copy.copy(ydata)
    sku1='One Style'
    sku2=None
    lbimgs=[imgttt['large'] for imgttt in list(colorImages.values())[0] if 'icon' not in imgttt['large']]
    zt=lbimgs[0]
    skuimg=lbimgs[0]
    cydata['skuid']=fasin
    cydata['sku1']=sku1
    cydata['sku2']=sku2
    cydata['image']=zt
    cydata['skuimg']=skuimg
    cydata['images']=json.dumps(lbimgs)
    cydata['url']=f'https://www.amazon.com/dp/{fasin}?th=1&psc=1'

    return cydata